from matplotlib import rcParams
import pandas as pd
from linearmodels import OLS
import numpy as np
import warnings
warnings.filterwarnings("ignore")

df_status = pd.read_csv('data/jesus_maria_status_deidentified.csv')
df_status.loc[df_status['sex']=='M', 'sex'] = 1
df_status.loc[df_status['sex']=='F', 'sex'] = 0
covariates = ['Q1_total_due','Q1_predial_due', 'Q1_arbitrios_due',
              'total_due', 'predial_due', 'arbitrios_due',
              'score_exo_covariates', 'score_endo_covariates',
                   'last_year_share_repaid_by_3',
                   'is_pricos', 'has_employer',
                   'has_education', 'has_email', 'has_cellular',
                    'salary', 'age', 'is_local', 'sex']

with open('figs/tableE1.txt', 'w') as text_file:
    for cov in covariates:
        print(df_status[[cov] + ['assignment_to_treatment']].groupby(['assignment_to_treatment']).mean(), file=text_file)

with open('figs/tableE1.txt', 'a') as text_file:
    print("", file=text_file)
    for cov in covariates:
        formula = cov + ' ~ assignment_to_control'
        df_status_forreg = df_status[~df_status[cov].isna()]
        df_status_forreg[cov] = df_status_forreg[cov].astype(float)
        df_status_forreg['assignment_to_control'] = 1-df_status_forreg['assignment_to_treatment']
        model = OLS.from_formula(formula, data=df_status_forreg)
        res = model.fit()
        df_results = res.params.to_frame(cov)
        df_results['SE'] = res.std_errors
        df_results['pvalues'] = res.pvalues
        print(df_results, file=text_file)
        
imputation_list = ['is_age_imputed', 'is_salary_imputed', 'is_last_year_share_repaid_by_3_imputed',
                'age_unimputed',  'salary_unimputed', 'last_year_share_repaid_by_3_unimputed']

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
with open('figs/tableE1_imputations.txt', 'w') as text_file:
    for cov in imputation_list:
        print(df_status[[cov] + ['assignment_to_treatment']].groupby(['assignment_to_treatment']).mean(), file=text_file)

with open('figs/tableE1_imputations.txt', 'a') as text_file:
    print("", file=text_file)
    for cov in imputation_list:
        formula = cov + ' ~ assignment_to_control'
        df_status_forreg = df_status[~df_status[cov].isna()]
        df_status_forreg[cov] = df_status_forreg[cov].astype(float)
        df_status_forreg['assignment_to_control'] = 1-df_status_forreg['assignment_to_treatment']
        model = OLS.from_formula(formula, data=df_status_forreg)
        res = model.fit()
        df_results = res.params.to_frame(cov)
        df_results['SE'] = res.std_errors
        df_results['pvalues'] = res.pvalues
        print(df_results, file=text_file)
      
